{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.base import BaseEstimator, TransformerMixin\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.preprocessing import OneHotEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "emp_data = pd.read_csv('https://raw.githubusercontent.com/zekelabs/data-science-complete-tutorial/master/Data/HR_comma_sep.csv.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "emp_data.rename(columns={'sales':'dept'}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SalaryMapper(BaseEstimator, TransformerMixin):\n",
    "    \n",
    "    def fit(self,X,Y=None):\n",
    "        return self\n",
    "    \n",
    "    def transform(self,X,Y=None):\n",
    "        db = {'low':1,'medium':2,'high':3}\n",
    "        print (type(X))\n",
    "        r = X.str.strip().replace(db)\n",
    "        return pd.DataFrame(r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_cols = emp_data.select_dtypes('object').columns.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_cols = emp_data.select_dtypes(exclude=['object']).columns.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['satisfaction_level',\n",
       " 'last_evaluation',\n",
       " 'number_project',\n",
       " 'average_montly_hours',\n",
       " 'time_spend_company',\n",
       " 'Work_accident',\n",
       " 'left',\n",
       " 'promotion_last_5years']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_cols.remove('left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_pipeline = make_pipeline(MinMaxScaler())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "salary_pipeline = make_pipeline(SalaryMapper())\n",
    "dept_pipeline = make_pipeline(OneHotEncoder())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.compose import ColumnTransformer\n",
    "from sklearn.tree import DecisionTreeClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "ct = ColumnTransformer(\n",
    "      transformers=[\n",
    "          ('number_data', num_pipeline, num_cols),\n",
    "          ('salary_data', salary_pipeline, 'salary'),\n",
    "          ('dept_data', dept_pipeline, ['dept'])\n",
    "      ]\n",
    ")\n",
    "\n",
    "pipeline = make_pipeline(ct,DecisionTreeClassifier())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainX,testX,trainY,testY = train_test_split(emp_data.drop('left', axis=1), emp_data.left)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/awantik/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/data.py:323: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by MinMaxScaler.\n",
      "  return self.partial_fit(X, y)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Pipeline(memory=None,\n",
       "     steps=[('columntransformer', ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,\n",
       "         transformer_weights=None,\n",
       "         transformers=[('number_data', Pipeline(memory=None,\n",
       "     steps=[('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1)))]), ['satisfaction_level', 'l...      min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
       "            splitter='best'))])"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipeline.fit(trainX,trainY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.9776"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipeline.score(testX,testY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
